home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
The World of Computer Software.iso
/
psd.zip
/
READ.SCM
< prev
next >
Wrap
Text File
|
1992-07-09
|
17KB
|
654 lines
;;;;
;;;; read.scm 1.17
;;;;
;;;; psd -- a portable Scheme debugger, version 1.0
;;;; Copyright (C) 1992 Pertti Kellomaki, pk@cs.tut.fi
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 1, or (at your option)
;;;; any later version.
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;; You should have received a copy of the GNU General Public License
;;;; along with this program; if not, write to the Free Software
;;;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;;;; See file COPYING in the psd distribution.
;;;;
;;;; Written by Pertti Kellomaki, pk@cs.tut.fi
;;;;
;;;; This file contains the reader for psd. We can not use plain read,
;;;; because we want to know where in a file we are. The reader
;;;; returns a pexp, which is a sexp with position information.
;;;;
;;;----------------------------------------------------------------------
;;; modification: egb (edward briggs (briggs@getoff.dec.com)) added support
;; for binary, octal and hex numbers. (e.g. #b0101, #o77, #xa0).
;; 1) added predicates digit-2? digit-8? digit-16?
;; 2) added routines read-hex-number, read binary-number, and
;; read-octal-number
;; 3) added lines to read-hashed-token to find these numbers
;;
;;----------------------------------------------------------------------
;;; Current position in the source file. These are updated from
;;; elsewhere. Not nice, should do it some other way.
(define *psd-source-line-number* 1)
(define *psd-source-char-position* 1)
;;; In order to save space, path names are stored as integers in the
;;; instrumented file. psd-path->index and psd-index->path do the
;;; conversion.
(define psd-path->index #f)
(define psd-index->path #f)
(let ((path-names '())
(count -1))
(set! psd-path->index
(lambda (str)
(let ((result (assoc str path-names)))
(if (not result)
(begin
(set! count (+ count 1))
(set! path-names
`((,count . ,str)
(,str . ,count)
,@path-names))
count)
(cdr result)))))
(set! psd-index->path
(lambda (index)
(cdr (assoc index path-names)))))
;;;
;;; Read an expression from the port, and tag it with the given source
;;; file name and position information.
(define psd-read
(let ((+ +) (- -) (= =) (boolean? boolean?) (caddr caddr) (cadr cadr)
(car car) (cddr cddr) (cdr cdr)
(char-whitespace? char-whitespace?) (char=? char=?)
(char? char?) (cons cons) (eof-object? eof-object?)
(eq? eq?) (equal? equal?) (error error)
(length length) (list list) (list->string list->string)
(member member) (not not) (null? null?) (number? number?)
(peek-char peek-char) (read read) (read-char read-char)
(reverse reverse) (string->number string->number)
(string->symbol string->symbol) (string-append string-append)
(string-ci=? string-ci=?) (string? string?) (symbol? symbol?))
(lambda (port source-file-name)
;;;
;;; Read a character and update position.
;;;
(define (get-char)
(let ((char (read-char port)))
(cond ((eof-object? char) char)
(else
(case char
((#\newline)
(set! *psd-source-char-position* 0)
(set! *psd-source-line-number* (+ *psd-source-line-number* 1)))
(else
(set! *psd-source-char-position* (+ *psd-source-char-position* 1))))
char))))
;;;
;;; Look at the next character.
;;;
(define (next-char) (peek-char port))
;;;
;;; Is the next character one of the given ones?
;;;
(define (next? . chars)
(member (next-char) chars))
;;;
;;; Build a list describing the current position
;;;
(define (current-position)
(list (psd-path->index source-file-name)
*psd-source-line-number*
*psd-source-char-position*))
;;;
;;; Tokens. The starting and ending positions are supplied with
;;; each token.
;;;
(define (make-token start end contents) (list start end contents))
(define (token-start tok) (car tok))
(define (token-end tok) (cadr tok))
(define (token-contents tok) (caddr tok))
;;;
;;; These are used for some special tokens.
;;;
(define left-paren '(left-paren))
(define right-paren '(right-paren))
(define vector-start '(vector-start))
(define dot '(dot))
(define quote-token '(quote))
(define quasiquote-token '(quasiquote))
(define unquote-token '(unquote))
(define unquote-splicing-token '(unquote-splicing))
(define line-directive-token '(line-directive))
;;;
;;; Classify characters. See R4RS Formal syntax (7.1)
;;;
(define (letter? c)
(member c '(#\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m #\n
#\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z #\A
#\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M #\N
#\O #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z)))
(define (special-initial? c)
(member c '(#\! #\$ #\% #\& #\* #\/ #\: #\< #\= #\> #\? #\~ #\_ #\^)))
(define (initial? c)
(or (letter? c) (special-initial? c)))
(define (digit? c)
(member c '(#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0)))
(define (digit-2? c)
(member c '(#\0 #\1)))
(define (digit-8? c)
(member c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)))
(define (digit-16? c)
(member c '(#\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\0 #\a #\b
#\c #\d #\e #\f #\A #\B #\C #\D #\E #\F)))
(define (special-subsequent? c)
(member c '(#\. #\+ #\- )))
(define (subsequent? c)
(or (initial? c) (digit? c) (special-subsequent? c)))
;;;
;;; Skip white space.
;;;
(define (skip-white-space)
(if (eof-object? (next-char))
#f
(cond
((char-whitespace? (next-char))
(get-char)
(skip-white-space))
((next? #\;)
(let loop ()
(cond ((eof-object? (next-char))
#f)
((next? #\newline)
(skip-white-space))
(else
(get-char)
(loop))))))))
;;;
;;; Read next token.
;;;
(define (read-token)
(skip-white-space)
(if (equal? (next-char) #\#)
;; If it starts with a hash sign, it might be a line
;; directive. In that case, just read the next token.
(let* ((start (current-position))
(contents (read-hashed-token))
(end (current-position)))
(if (eq? contents line-directive-token)
(read-token)
(make-token start end contents)))
(let* ((start (current-position))
(contents
(cond
((eof-object? (next-char))
(get-char))
((initial? (next-char))
(read-identifier))
((next? #\+ #\- #\.)
(maybe-read-peculiar-identifier))
((digit? (next-char))
(read-number))
((next? #\()
(get-char)
left-paren)
((next? #\))
(get-char)
right-paren)
((next? #\')
(get-char)
quote-token)
((next? #\`)
(get-char)
quasiquote-token)
((next? #\,)
(get-char)
(if (next? #\@)
(begin (get-char)
unquote-splicing-token)
unquote-token))
((next? #\")
(read-string))
(else
(error "read-token: bad character " (next-char)))))
(end (current-position)))
(make-token start end contents))))
;;;
;;; Read a string.
;;;
(define (read-string)
(get-char)
(let loop ((result '()))
(cond
((next? #\")
(get-char)
(list->string (reverse result)))
((next? #\\)
(get-char)
(loop (cons (get-char) result)))
(else
(loop (cons (get-char) result))))))
;;;
;;; Read a token starting with a hash sign.
;;;
(define (read-hashed-token)
(get-char)
(cond
((next? #\t)
(get-char)
#t)
((next? #\f)
(get-char)
#f)
((next? #\\)
(read-character))
((or (next? #\x) (next? #\X))
(get-char)
(read-hex-number))
((or (next? #\b) (next? #\B))
(get-char)
(read-binary-number))
((or (next? #\o) (next? #\O))
(get-char)
(read-octal-number))
((next? #\()
(get-char)
vector-start)
;; we return a special token to inform that this was not a real
;; token but a line directive
((next? #\l)
(read-line-directive)
line-directive-token)
(else
(error "read-hashed-token: bad character " (next-char)))))
;;;
;;; Read a line directive, of the form "#line file line column #".
;;; The trailing hash is used for making sure that we don't run past
;;; the end of line. At least scm version 3c8 will read one more trailing
;;; whitespace character than R4RS says it should. In later versions
;;; this is fixed.
;;;
(define (read-line-directive)
(get-char)
(if (next? #\i)
(get-char)
(error "read-line-directive: bad character " (next-char)))
(if (next? #\n)
(get-char)
(error "read-line-directive: bad character " (next-char)))
(if (next? #\e)
(get-char)
(error "read-line-directive: bad character " (next-char)))
;; now we don't have to worry about loosing count where we are,
;; because we are going to read the new position from the file.
(set! source-file-name (read port))
(set! *psd-source-line-number* (read port))
(set! *psd-source-char-position* (read port))
;; the position corresponds to the start of next line
(let loop ((next (read-char port)))
(if (char=? next #\newline)
#f
(loop (read-char port)))))
;;;
;;; Read a character constant.
;;;
(define (read-character)
(get-char)
(let loop ((result (list (get-char))))
(if (letter? (next-char))
(loop (cons (get-char) result))
(cond ((= (length result) 1)
(car result))
(else
(let ((name (list->string (reverse result))))
(cond ((string-ci=? name "space") #\space)
((string-ci=? name "newline") #\newline)
(else (error "read-character: character name not defined in R4RS "
name)))))))))
;;;
;;; Read a vector constant.
;;;
(define (read-vector start-token)
(let loop ((contents '())
(this (internal-read)))
(cond ((eof-object? this)
(error "read-vector: premature end of file"))
((eq? (psd-expr-type this) 'right-paren)
(psd-make-vector (psd-expr-start start-token)
(psd-expr-end this)
(reverse contents)))
(else (loop (cons this contents)
(internal-read))))))
;;;
;;; Read a normal identifier.
;;;
(define (read-identifier)
(let loop ((result (list (get-char))))
(if (subsequent? (next-char))
(loop (cons (get-char) result))
(string->symbol (list->string (reverse result))))))
;;;
;;; Read a peculiar identifier (+ - ... or a single dot)
;;;
(define (maybe-read-peculiar-identifier)
(let ((first (get-char)))
(case first
((#\+)
(if (digit? (next-char))
(read-number)
'+))
((#\-)
(if (digit? (next-char))
(- (read-number))
'-))
((#\.)
(if (next? #\.)
(if (and (get-char)
(next? #\.)
(get-char))
'...
(error "The only identifier that may start with dot is ..."))
dot)))))
;;;
;;; Read a number. Handles only integers and floats without exponents.
;;;
(define (read-number)
(define (read-sign)
(cond ((or (next? #\+)
(next? #\-))
(string (get-char)))
(else "")))
(define (uinteger)
(let loop ((result '()))
(if (or (digit? (next-char))
(next? #\#))
(loop (cons (get-char) result))
(list->string (reverse result)))))
(define (exponent-marker)
(cond ((or (next? #\e)
(next? #\s)
(next? #\f)
(next? #\d)
(next? #\l))
(string (get-char)))
(else "")))
(let* ((sign (read-sign))
(integer-part (uinteger))
(fractional-part
(if (next? #\.)
(begin
(get-char)
(string-append "." (uinteger)))
""))
(marker (exponent-marker))
(exponent
(if (string=? "" marker)
""
(string-append marker(uinteger)))))
(string->number (string-append sign
integer-part
fractional-part
exponent))))
;;;
;;; Support for hex, octal and binary.
;;; Added by egb.
;;;
(define (read-binary-number)
(define (binaryinteger)
(let loop ((result '()))
(if (digit-2? (next-char))
(loop (cons (get-char) result))
(list->string (reverse result)))))
(string->number (string-append "#b" (binaryinteger))))
(define (read-octal-number)
(define (octalinteger)
(let loop ((result '()))
(if (digit-8? (next-char))
(loop (cons (get-char) result))
(list->string (reverse result)))))
(string->number (string-append "#o" (octalinteger))))
(define (read-hex-number)
(define (hexinteger)
(let loop ((result '()))
(if (digit-16? (next-char))
(loop (cons (get-char) result))
(list->string (reverse result)))))
(string->number (string-append "#x" (hexinteger))))
;;;
;;; Read a list up to the ending paren.
;;;
(define (read-list starting-paren)
(define (list->plist lst start end)
(cond
;; end of list
((null? lst)
(psd-make-null start end))
;; dotted pair, there should be exactly one expression after the dot
((eq? (psd-expr-type (car lst)) 'dot)
(cond ((or (null? (cdr lst))
(not (null? (cddr lst))))
(error "Bad dotted pair."))
(else (cadr lst))))
(else
(psd-cons (car lst)
(list->plist (cdr lst)
(if (null? (cdr lst))
end
(psd-expr-start (cadr lst)))
end)
start
end))))
(let loop ((result '())
(this (internal-read)))
(cond
;; the list ended
((eq? (psd-expr-type this) 'right-paren)
(list->plist (reverse result)
(psd-expr-start starting-paren)
(psd-expr-end this)))
;; continue reading
(else
(loop (cons this result)
(internal-read))))))
;;;
;;; The reader proper.
;;;
(define (internal-read)
(let* ((token (read-token))
(contents (token-contents token)))
(cond
((eq? contents left-paren)
(read-list token))
((eq? contents vector-start)
(read-vector token))
((symbol? contents)
(psd-make-symbol
(token-start token)
(token-end token)
contents))
((number? contents)
(psd-make-number
(token-start token)
(token-end token)
contents))
((char? contents)
(psd-make-char
(token-start token)
(token-end token)
contents))
((eq? contents right-paren)
(psd-make-expr 'right-paren
(token-start token)
(token-end token)
contents))
((eq? contents dot)
(psd-make-expr 'dot
(token-start token)
(token-end token)
contents))
((eq? contents quote-token)
(let ((quoted-expr (internal-read)))
(psd-cons (psd-make-symbol (token-start token)
(token-end token)
'quote)
(psd-cons quoted-expr
(psd-make-null (psd-expr-end quoted-expr)
(psd-expr-end quoted-expr))
(psd-expr-start quoted-expr)
(psd-expr-end quoted-expr))
(psd-expr-start quoted-expr)
(psd-expr-end quoted-expr))))
((eq? contents quasiquote-token)
(let ((quasiquoted-expr (internal-read)))
(psd-cons (psd-make-symbol (token-start token)
(token-end token)
'quasiquote)
(psd-cons quasiquoted-expr
(psd-make-null (psd-expr-end quasiquoted-expr)
(psd-expr-end quasiquoted-expr))
(psd-expr-start quasiquoted-expr)
(psd-expr-end quasiquoted-expr))
(psd-expr-start quasiquoted-expr)
(psd-expr-end quasiquoted-expr))))
((eq? contents unquote-token)
(let ((unquoted-expr (internal-read)))
(psd-cons (psd-make-symbol (token-start token)
(token-end token)
'unquote)
(psd-cons unquoted-expr
(psd-make-null (psd-expr-end unquoted-expr)
(psd-expr-end unquoted-expr))
(psd-expr-start unquoted-expr)
(psd-expr-end unquoted-expr))
(psd-expr-start unquoted-expr)
(psd-expr-end unquoted-expr))))
((eq? contents unquote-splicing-token)
(let ((unquoted-expr (internal-read)))
(psd-cons (psd-make-symbol (token-start token)
(token-end token)
'unquote-splicing)
(psd-cons unquoted-expr
(psd-make-null (psd-expr-end unquoted-expr)
(psd-expr-end unquoted-expr))
(psd-expr-start unquoted-expr)
(psd-expr-end unquoted-expr))
(psd-expr-start unquoted-expr)
(psd-expr-end unquoted-expr))))
((boolean? contents)
(psd-make-boolean
(token-start token)
(token-end token)
contents))
((string? contents)
(psd-make-string
(token-start token)
(token-end token)
contents))
((eof-object? contents)
contents))))
;; body of psd-read
(internal-read))))